A new post by hswerdfe
languages <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-03-21/languages.csv')
p_dat <-
languages |>
#count(pldb_id , sort = TRUE) |>
select(pldb_id, title, github_language_repos, github_language_type) |>
filter(!is.na(github_language_repos)) |>
group_by(github_language_type) |>
mutate(pldb_id = fct_reorder(pldb_id, github_language_repos)) |>
mutate(pldb_id = fct_lump(pldb_id, n =9, w = github_language_repos)) |>
#ungroup() |> count()
group_by(github_language_type, pldb_id) |>
summarise(github_language_repos = sum(github_language_repos)) |>
mutate(lbl = glue::glue('{pldb_id} ({str_squish(format(github_language_repos, big.mark = ","))})'))
p_dat_facet <-
p_dat |>
group_by(github_language_type) |>
summarise(github_language_repos_sum = sum(github_language_repos),
github_language_repos = max(github_language_repos)/2) |>
mutate(lbl = glue::glue('{github_language_type}\n({str_squish(format(round(github_language_repos), big.mark = ","))})'))
p_dat |>
ggplot(aes(x = github_language_repos, y = pldb_id, fill = github_language_type, color = github_language_type)) +
facet_wrap(~ github_language_type, scales = 'free', ncol = 1) +
geom_text(aes(label = lbl), x = 0, hjust = 'left', color = 'black') +
geom_text(data = p_dat_facet, mapping = aes(label = lbl, x = github_language_repos), y = 5, alpha = 0.5, size = 10) +
geom_col(alpha = 0.25) +
guides(fill = 'none', color = 'none') +
labs(title = 'Most Popular Languages', subtitle = 'by Language Type (as measured by github repos)', x = '', y = '') +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
plot.title=element_text(size=25, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.subtitle = element_text(size=15, hjust=0.5, face="bold", colour="grey", vjust=-1),
plot.caption =element_text(size=18, hjust=0.5, face="italic", color="black"),
strip.text = element_blank()
)
I_know_well <- c('Python','R','SQL', 'SQLite', 'PostgreSQL')
have_worked_with <- c('Java', 'Visual Basic','Fortran', 'C','C++', 'Perl', 'MySQL','Microsoft SQL Server','XML','JSON', 'JavaScript')
p <-
languages |>
select(title, number_of_jobs, number_of_users) |>
#filter(str_detect(title,regex('java', ignore_case = TRUE)))
mutate(I_know = title %in% c(I_know_well, have_worked_with) ) |>
ggplot(aes(x = number_of_jobs, y = number_of_users, label = title, color = I_know)) +
geom_point() +
labs(x = 'Number of Jobs (log scale)', y = 'Number of Users (log scale)', color = 'hswerdfe Knows') +
scale_x_log10() +
scale_y_log10()
ggplotly(p)